library(tidyr)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(readr)
library(data.table)
library(stargazer)
library(modelsummary)
library(fixest)
library(doBy)



bartik_df<-read.csv("dataset_1.csv")
EC_JTR_meet<-read.csv("dataset_2.csv")

##################################################################################################################################################################
##descriptive statistics EU DGs####################################################################################################################################################
##################################################################################################################################################################

# Figure A1 - DG

bartik_df_1 <- summaryBy(AST_female_ratio + AD_female_ratio ~ DG, FUN=c(mean), data=bartik_df)

bartik_df_1$DG<-as.factor(bartik_df_1$DG)

bartik_df_2 <-bartik_df_1

bartik_df_2$DG <- factor(bartik_df_2$DG,                                    # Factor levels in decreasing order
                         levels = bartik_df_2$DG[order(bartik_df_2$AD_female_ratio.mean, decreasing = TRUE)])

p_ad<-ggplot(aes(x=as.factor(DG), y=AD_female_ratio.mean), data = bartik_df_2) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()

p_ad1<-p_ad + labs(fill = "Gender")

p_ad2<-p_ad1 + xlab("") + ylab("")  + ggtitle("AD")+  theme(text = element_text(size = 25)) 

p_ad2

bartik_df_3 <-bartik_df_1

bartik_df_3$DG <- factor(bartik_df_3$DG,                                    # Factor levels in decreasing order
                         levels = bartik_df_3$DG[order(bartik_df_3$AST_female_ratio.mean, decreasing = TRUE)])

levels(bartik_df_3$DG)

p_ast<-ggplot(aes(x=as.factor(DG), y=AST_female_ratio.mean), data = bartik_df_3) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()

p_ast1<-p_ast + labs(fill = "Gender")

p_ast2<-p_ast1 + xlab("") + ylab("") + ggtitle("AST")  + theme(text = element_text(size = 25)) 

p_ast2

png("Figure_A1.png",width = 15, height = 15, units = 'in', res = 150)

grid.arrange(p_ad2, p_ast2, ncol = 2) 

dev.off()

# Figure A2 - Policy Area

bartik_df_pol <- summaryBy(AST_female_ratio + AD_female_ratio ~ policy_area, FUN=c(mean), data=bartik_df)

bartik_df_pol$policy_area<-as.factor(bartik_df_pol$policy_area)

bartik_df_pol1 <-bartik_df_pol

bartik_df_pol1$policy_area <- factor(bartik_df_pol1$policy_area,                                    # Factor levels in decreasing order
                                     levels = bartik_df_pol1$policy_area[order(bartik_df_pol1$AD_female_ratio.mean, decreasing = TRUE)])


p<-ggplot(aes(x=as.factor(policy_area), y=AD_female_ratio.mean), data = bartik_df_pol1) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()


P1<-p + labs(fill = "Gender")

P2<-P1 + xlab("") + ylab("")+ ggtitle("AD") + theme(text = element_text(size = 25)) 
P2


bartik_df_pol2 <-bartik_df_pol

bartik_df_pol2$policy_area <- factor(bartik_df_pol2$policy_area,                                    # Factor levels in decreasing order
                                     levels = bartik_df_pol2$policy_area[order(bartik_df_pol2$AST_female_ratio.mean, decreasing = TRUE)])


p<-ggplot(aes(x=as.factor(policy_area), y=AST_female_ratio.mean), data = bartik_df_pol2) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()


P1<-p + labs(fill = "Gender")

c2<-P1 + xlab("") + ylab("")+ ggtitle("AST") + theme(text = element_text(size = 22)) 
c2

png("Figure_A2.png",width = 15, height = 15, units = 'in', res = 150)

grid.arrange(c2, P2, ncol = 1) 

dev.off()

##################################################################################################################################################################
##Descriptive statistics IGs####################################################################################################################################################
##################################################################################################################################################################
bartik_df$Section[bartik_df$Section=="I - Professional consultancies/law firms/self-employed consultants"] <- "Consultancies"
bartik_df$Section[bartik_df$Section=="II - In-house lobbyists and trade/business/professional associations"] <- "In-house Lobbyists"
bartik_df$Section[bartik_df$Section=="III - Non-governmental organisations"] <- "NGOs"
bartik_df$Section[bartik_df$Section=="IV - Think tanks, research and academic institutions"] <- "Think Tanks"
bartik_df$Section[bartik_df$Section=="V - Organisations representing churches and religious communities"] <- "Religious"
bartik_df$Section[bartik_df$Section=="VI - Organisations representing local, regional and municipal authorities, other public or mixed entities, etc."] <- "Local and Regional"


bartik_df$gender_EU_n<-ifelse(bartik_df$gender_EU=="female",1,0)

mean <- function(x)base::mean(x, na.rm=TRUE)

# Figure A3 - Type

bartik_df_type <- summaryBy(gender_EU_n ~ Section, FUN=c(mean), data=bartik_df)

bartik_df_type$Section<-as.factor(bartik_df_type$Section)

bartik_df_type$Section <- factor(bartik_df_type$Section,                                    # Factor levels in decreasing order
                                 levels = bartik_df_type$Section[order(bartik_df_type$gender_EU_n.mean, decreasing = TRUE)])

png("Figure_A3.png",width = 15, height = 15, units = 'in', res = 500)

p<-ggplot(aes(x=as.factor(Section), y=gender_EU_n.mean), data = bartik_df_type) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()


P1<-p + labs(fill = "Gender")

P2<-P1 + xlab("") + ylab("") + theme(text = element_text(size = 30)) 
P2
dev.off()

# Figure A4 - DG

table(bartik_df$DG)

bartik_df_DG <- summaryBy(gender_EU_n ~ DG, FUN=c(mean), data=bartik_df)

bartik_df_DG$DG<-as.factor(bartik_df_DG$DG)

bartik_df_DG$DG <- factor(bartik_df_DG$DG,                                    # Factor levels in decreasing order
                          levels = bartik_df_DG$DG[order(bartik_df_DG$gender_EU_n.mean, decreasing = TRUE)])

png("Figure_A4.png",width = 15, height = 15, units = 'in', res = 500)

p<-ggplot(aes(x=as.factor(DG), y=gender_EU_n.mean), data = bartik_df_DG) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()


P1<-p + labs(fill = "Gender")

P2<-P1 + xlab("") + ylab("") + theme(text = element_text(size = 30)) 
P2
dev.off()


# Figure A5 - Policy

table(bartik_df$policy_area)

bartik_df_policy <- summaryBy(gender_EU_n ~ policy_area, FUN=c(mean), data=bartik_df)

bartik_df_policy$policy_area<-as.factor(bartik_df_policy$policy_area)

bartik_df_policy$policy_area <- factor(bartik_df_policy$policy_area,                                    # Factor levels in decreasing order
                                       levels = bartik_df_policy$policy_area[order(bartik_df_policy$gender_EU_n.mean, decreasing = TRUE)])

png("Figure_A5.png",width = 15, height = 15, units = 'in', res = 500)

p<-ggplot(aes(x=as.factor(policy_area), y=gender_EU_n.mean), data = bartik_df_policy) + stat_summary(fun = "mean", geom = "bar") +  coord_flip()


P1<-p + labs(fill = "Gender")

P2<-P1 + xlab("") + ylab("") + theme(text = element_text(size = 22)) 
P2
dev.off()

##################################################################################################################################################################
###DG Gender and IG Gender###################################################################################################################################################
##################################################################################################################################################################
run_models <- function(dep_var, output_file) {
  # Run the models
  model1 <- feols(as.formula(paste(dep_var, "~ 1 | 0 | gender_DG ~ bartik_diff")), data = bartik_df)
  model2 <- feols(as.formula(paste(dep_var, "~ Full.time.equivalent..FTE. + Overall.budget.Turnover..absolute.amount. | Section + Head.office.country | gender_DG ~ bartik_diff")), data = bartik_df)
  model3 <- feols(as.formula(paste(dep_var, "~ Full.time.equivalent..FTE. + Overall.budget.Turnover..absolute.amount. | Section + Head.office.country + policy_area | gender_DG ~ bartik_diff")), data = bartik_df)
  
  # Extract number of observations for each model
  num_obs <- c(
    nobs(model1),
    nobs(model2),
    nobs(model3)
  )
  
  # Create a data frame for additional rows
  add_rows_df <- tribble(
    ~term,                            ~Model1, ~Model2, ~Model3,
    "IG Type",                        "No",    "Yes",   "Yes",
    "IG Country",                     "No",    "Yes",   "Yes",
    "Policy Area",                    "No",    "No",    "Yes",
    "IG Covs",                        "No",    "Yes",   "Yes",
    "Number of Observations",         as.character(num_obs[1]), 
    as.character(num_obs[2]), 
    as.character(num_obs[3])
  )
  
  # Generate the model summary
  modelsummary(
    list(
      "Model 1" = model1,
      "Model 2" = model2,
      "Model 3" = model3
    ),
    output = output_file,
    coef_map = c("fit_gender_DG" = "Gender DG"),
    gof_omit = ".*",
    stars = TRUE,
    add_rows = add_rows_df,
    notes = "Significance levels: *** p < 0.01, ** p < 0.05, * p < 0.1"
  )
}

# Run models for different dependent variables
run_models("gender_EU_s", "Table_1.tex")
run_models("gender_avg_EP", "Table_A3.tex")
run_models("gender_avg_s", "Table_A4.tex")

##################################################################################################################################################################
##Regression EC Meetings - IG Rep ############################################################################################################################################
##################################################################################################################################################################

# Table 2 - Access

model0<- glm(meeting ~ gender_EU_s , data = EC_JTR_meet, family = "binomial")

model1<- glm(meeting ~ gender_EU_s + gender_DG, data = EC_JTR_meet, family = "binomial")

model2<- glm(meeting ~ gender_EU_s + gender_DG +Full.time.equivalent..FTE. + Overall.budget.Turnover..absolute.amount., data = EC_JTR_meet, family = "binomial")

model3<- glm(meeting ~ gender_EU_s+ gender_DG +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country)+as.factor(Section)    , data = EC_JTR_meet, family = "binomial")

model4<- glm(meeting ~ gender_EU_s  +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country)+as.factor(Section)+as.factor(DG)    , data = EC_JTR_meet, family = "binomial")

model5<- glm(meeting ~ gender_EU_s+ gender_DG +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country)+as.factor(Section)+as.factor(policy_area)    , data = EC_JTR_meet, family = "binomial")


stargazer(model0, model1, model2, model3,model4,model5,
          title= "EC Meetings: Meeting and Gender EU Rep",
          keep=c("gender_EU_s"),
          align=TRUE,
          out="Table_2.tex",
          type = 'latex',
          style = "qje", 
          omit.stat  = c("adj.rsq","aic", "chi2","ll"),
          add.lines=list(c("Gender DG","", "X", "X", "X", "","X"), c("IG Covs","", "", "X", "X", "X","X"), c("IG Country","", "", "", "X", "X","X"), c("IG Type","", "", "", "X", "X","X"), c("DG","", "", "", "", "X",""),c("Policy Area","", "", "", "", "","X")),
          covariate.labels = c("Gender EU Rep"),
          dep.var.labels  = "Meeting")


##Balance Tests

# Table A1 - Shifts

model1_bal  <- lm(AD_avg_leave_diff ~ gender_EU_s  , data = bartik_df)
summary(model1_bal)
model2_bal  <- lm(AD_avg_leave_diff ~ gender_EU_s  +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country) +as.factor(Section), data = bartik_df)
summary(model2_bal)
model3_bal  <- lm(AD_avg_leave_diff ~ gender_EU_s +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country) +as.factor(Section) +as.factor(policy_area), data = bartik_df)
summary(model3_bal)

stargazer(model1_bal,model2_bal,model3_bal,
          title= "Balance: Gender EU Representative and Gender DG Shifts",
          keep=c("gender_EU_s"),
          align=TRUE,
          out="Table_A1.tex",
          type = 'latex',
          style = "qje", 
          omit.stat  = c("adj.rsq","aic", "chi2","ll", "ser"),  
          add.lines=list(c("IG Covs", "", "X","X"), c("IG Country", "", "X","X"), c("IG Type", "", "X","X"), c("Policy Area", "", "","X")),
          covariate.labels = c("Gender EU Rep"),
          dep.var.labels  = "Gender DG Shifts")

# Table A2 - Initial Shares

model1_bal1  <- lm(AD_female_ratio_init ~ gender_EU_s  , data = bartik_df)
summary(model1_bal1)
model2_bal1 <- lm(AD_female_ratio_init ~ gender_EU_s  +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country) +as.factor(Section), data = bartik_df)
summary(model2_bal1)
model3_bal1  <- lm(AD_female_ratio_init ~ gender_EU_s +Full.time.equivalent..FTE. +Overall.budget.Turnover..absolute.amount. +as.factor(Head.office.country) +as.factor(Section) +as.factor(policy_area), data = bartik_df)
summary(model3_bal1)

stargazer(model1_bal1,model2_bal1,model3_bal1,
          title= "Balance: Gender EU Representative and Gender DG Shares",
          keep=c("gender_EU_s"),
          # title="Results",
          align=TRUE,
          out="Table_A2.tex",
          type = 'latex',
          style = "qje", 
          omit.stat  = c("adj.rsq","aic", "chi2","ll", "ser"),  
          add.lines=list(c("IG Covs", "", "X","X"), c("IG Country", "", "X","X"), c("IG Type", "", "X","X"), c("Policy Area", "", "","X")),
          covariate.labels = c("Gender EU Rep"),
          dep.var.labels  = "Gender DG Shares")


